# -*- coding: utf-8 -*-
# ----------------------------
# @Time    : 2022/5/29 7:39 PM
# @Author  : changqingai
# @FileName: 05-load_text_data.py
# ----------------------------

import tensorflow as tf


# ********** 1.以行为单位加载数据  ***************
file_paths = [
    "./datasets/1.text", "./datasets/2.text"]

ds = tf.data.TextLineDataset(file_paths)
for line in ds.take(5):
    print("line:", line.numpy())


# ********** 1.以行为单位加载数据  ***************
files_ds = tf.data.Dataset.from_tensor_slices(file_paths)
print("files_ds:", files_ds, list(files_ds.as_numpy_iterator()))
lines_ds = files_ds.interleave(tf.data.TextLineDataset, cycle_length=3)

for i, line in enumerate(lines_ds.take(8)):
    if i % 3 == 0:
        print()
    print(line.numpy())


# ********** 1.过滤行  ***************
def survived(line):
    print("survived_line: ", tf.strings.substr(line, 0, -1))
    # return tf.not_equal(tf.strings.substr(line, 0, 1), "0")
    return tf.not_equal(tf.strings.substr(line, 0, 1), "0")


survivors = ds.skip(2).filter(survived)
for line in survivors.take(10):
    print("line:", line.numpy())
